#!/usr/bin/env python
# encoding=utf8

## Database: status error codes
#  0 : not parsed
#  1 : parsed correctly
#  2 : paged not found
#  3 : attempted but something went wrong

# Define variables
db = "fb_users_edgelist_db"
db_source = "fb_users_timeline.sqlite"

import os
import sqlite3
import re
from random import randint
from time import sleep
import datetime
from datetime import time, date
from random import randrange
import sys
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException


def main():

    chromedriver = "chromedriver"
    os.environ["webdriver.chrome.driver"] = chromedriver
    
    driver = webdriver.Chrome(chromedriver)
    driver.implicitly_wait(30)

    # Check whether database exists already
    if os.path.isfile(db) is False:
        print "Database doesn't exist. Creating and populating one..."
        users = getUsersFromSource(db_source)
        createDb(db)
        populateDb(users, db)

    users = getUsersFromLog(db)

    for fb_id in users:

        url = composeUrl(fb_id) + "/friends"

        print "Parsing " + url

        try: 
            driver.get(url)
            friends = inspectPage(driver)
            enterData(db, fb_id, friends, 1)
        except:
            friends = None
            enterData(db, fb_id, friends, 3)

        sleep(randint(15,20))

    driver.quit()

    
def enterData(db, fb_id, friends, status):

    try:
        conn = sqlite3.connect(db)
        cursor = conn.cursor()

        for friend in friends:
            now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            cursor.execute("INSERT INTO edgelist ([from], [to], timestamp) VALUES (?,?,?);", (composeUrl(fb_id), friend, now))
            
        now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        cursor.execute("UPDATE log SET status = ?, timestamp = ?, number_friends = ? WHERE fb_id = ?;", (status, now, len(friends), fb_id))
        conn.commit()    

        return

    except Exception,e:
        print str(e)


def composeUrl(fb_id):

    regexp = re.escape('://www.facebook.com/')
    
    if re.search(regexp, fb_id) is not None:
        return fb_id
    else:
        return 'https://www.facebook.com/' + fb_id
    


def getUsersFromLog(db):

    try:
        conn = sqlite3.connect(db)
        cursor = conn.cursor()
        cursor.execute("SELECT fb_id FROM log WHERE STATUS = 0")
    except Exception,e:
        print str(e)

    query_list = []
    
    for (fb_id,) in cursor.fetchall():
        query_list.append(fb_id.encode("utf-8"))

    return query_list
    
def getUsersFromSource(db):

    try:
        conn = sqlite3.connect(db)
        cursor = conn.cursor()
        cursor.execute("SELECT fb_id FROM fbUsersTimeline WHERE STATUS = 1")
    except Exception,e:
        print str(e)

    query_list = []
    
    for (fb_id,) in cursor.fetchall():
        query_list.append(fb_id.encode("utf-8"))

    return query_list


def createDb(db):
    
    try:
        conn = sqlite3.connect(db)
        cursor = conn.cursor()
        cursor.execute('''
        CREATE TABLE IF NOT EXISTS edgelist (
                          pik INTEGER PRIMARY KEY AUTOINCREMENT,
                          [from] CHAR,
                          [to] CHAR,
                          timestamp DATETIME
                       );''')
        cursor.execute('''CREATE TABLE IF NOT EXISTS log (
                          fb_id CHAR PRIMARY KEY,
                          status INT DEFAULT 0,
                          number_friends INT,
                          timestamp DATETIME
                       );''')
        conn.commit()
        
    except Exception,e:
        print str(e)

            
def populateDb(users, db):

        try:
                conn = sqlite3.connect(db)
                cursor = conn.cursor()
                for user in users:
                        cursor.execute("INSERT INTO log (fb_id) VALUES (?);", (user,))
                conn.commit()
        
        except Exception,e:
                print str(e)
                

def scrapeFriends(soup):

    friends = []

    for user in soup.find_all("div", { "class" : "fsl fwb fcb" }):
        for anchor in user.find_all('a', href=True):
            m = re.search("^(.+)\\?fref=", anchor['href'])
            if m:
                url = m.group(1)
                # print url
                friends.append(url)

    return friends



def inspectPage(driver):

    try: 
        driver.find_element_by_xpath(".//*[@id='email']").send_keys("") # REDACTED
        driver.find_element_by_xpath(".//*[@id='pass']").send_keys("") # REDACTED
        driver.find_element_by_id("u_0_0").click()
    except:
        pass

    a = 0
    b = 0

    while True:
        try:
            driver.execute_script("scroll (0,1000000)")
            sleep(5)
            html = driver.page_source
            soup = BeautifulSoup(html)
            friends = scrapeFriends(soup)
            b = len(friends)
            if b == a:
                return friends
            else:
                a = b
                continue
        except:
            raise Exception("I don't see any friend box on this page...")

main()

